# -*- coding: utf-8 -*-
import scrapy
from scrapy import Request
from zc_core.model.items import Box
from zc_core.util.batch_gen import time_to_batch_no
from cgnpc.rules import *
from datetime import datetime
import json
from zc_core.spiders.base import BaseSpider

class SkuSpider(BaseSpider):
    name = 'spu'
    # 常用链接
    index_url = 'https://mall.cgnpc.com.cn/scm-cgn-oauth-web/obs/business/product/Catrgory/query?channelTypeCode=101'
    # 商品列表页 post
    spu_list_url = 'https://mall.cgnpc.com.cn/scm-cgn-oauth-web/obs/business/product/ProductSearch/search'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        if not batchNo:
            self.batch_no = time_to_batch_no(datetime.now())
        else:
            self.batch_no = int(batchNo)
        print("批次为:", self.batch_no)
        self.page_size = 20
        self.max_page_limit = 100

    def _build_list_req(self, url, catalog1Id, catalog1Name, catalog2Id, catalog2Name, catalog3Id, catalog3Name, page):
        return Request(
            method='POST',
            url=url,
            meta={
                'reqType': 'spu',
                'batchNo': self.batch_no,
                'page': page,
                'catalog1Id': catalog1Id,
                'catalog1Name': catalog1Name,
                'catalog2Id': catalog2Id,
                'catalog2Name': catalog2Name,
                'catalog3Id': catalog3Id,
                'catalog3Name': catalog3Name,
            },
            headers={
                'Content-Type': 'application/json',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36 Edg/90.0.818.62',
            },
            body=json.dumps({"t": "font", "s": catalog3Id, "des": catalog3Name, "sort": "price@desc", "groups": [],
                             "currentPage": page, "start": 30, "limit": 30}),
            callback=self.parse_spu_content_deal,
            errback=self.error_back,
            dont_filter=True,
        )

    def start_requests(self):
        # 品类、品牌
        yield Request(
            url=self.index_url,
            meta={
                'reqType': 'spu',
                'batchNo': self.batch_no,

            },
            callback=self.parse_total_page,
            errback=self.error_back,
            dont_filter=True,
        )

    # 处理spu列表
    def parse_total_page(self, response):
        # 处理品类列表
        cats = parse_catalog(response)
        if cats:
            self.logger.info('品类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)

            for cat in cats:
                if cat.get('level') == 3:
                    # 采集spu列表
                    catalog3Id = cat.get('catalogId')
                    catalog3Name = cat.get('catalogName')
                    catalog2Id = cat.get('parentId')
                    catalog2Name = [i.get('catalogName') for i in cats if i.get('catalogId') == cat.get('parentId')][0]
                    catalog1Id = [i.get('parentId') for i in cats if i.get('catalogId') == catalog2Id][0]
                    catalog1Name = [i.get('catalogName') for i in cats if i.get('catalogId') == catalog1Id][0]
                    yield self._build_list_req(self.spu_list_url, catalog1Id, catalog1Name, catalog2Id, catalog2Name,
                                               catalog3Id, catalog3Name, 1)

    # 处理spu列表
    def parse_spu_content_deal(self, response):
        meta = response.meta
        catalog1Name = meta.get('catalog1Name')
        catalog1Id = meta.get('catalog1Id')
        catalog2Name = meta.get('catalog2Name')
        catalog2Id = meta.get('catalog2Id')
        catalog3Name = meta.get('catalog3Name')
        catalog3Id = meta.get('catalog3Id')
        cur_page = meta.get('page')
        self.logger.info("清单: catId=%s, catName=%s,current=%s" % (catalog3Id, catalog3Name, cur_page))
        # 处理spu列表
        spu_list = parse_spu(response)

        if spu_list:
            yield Box('spu', self.batch_no, spu_list)
            # 发起分页
            if cur_page == 1:
                # 解析页面
                total_pages = parse_total_page(response)
                self.logger.info("清单1: catId=%s, catName=%s,total_page=%s" % (catalog3Id, catalog3Name, total_pages))
                for page in range(2, total_pages + 1):
                    yield self._build_list_req(self.spu_list_url, catalog1Id, catalog1Name, catalog2Id, catalog2Name,
                                               catalog3Id, catalog3Name, page)
        else:
            self.logger.info('空页: cat=%s, page=%s' % (catalog3Id, cur_page))
